1 /*
2 Egothor Software License version 1.00
3 Copyright (C) 1997-2004 Leo Galambos.
4 Copyright (C) 2002-2004 "Egothor developers"
5 on behalf of the Egothor Project.
6 All rights reserved.
7
8 This software is copyrighted by the "Egothor developers". If this
9 license applies to a single file or document, the "Egothor developers"
10 are the people or entities mentioned as copyright holders in that file
11 or document. If this license applies to the Egothor project as a
12 whole, the copyright holders are the people or entities mentioned in
13 the file CREDITS. This file can be found in the same location as this
14 license in the distribution.
15
16 Redistribution and use in source and binary forms, with or without
17 modification, are permitted provided that the following conditions are
18 met:
19 1. Redistributions of source code must retain the above copyright
20 notice, the list of contributors, this list of conditions, and the
21 following disclaimer.
22 2. Redistributions in binary form must reproduce the above copyright
23 notice, the list of contributors, this list of conditions, and the
24 disclaimer that follows these conditions in the documentation
25 and/or other materials provided with the distribution.
26 3. The name "Egothor" must not be used to endorse or promote products
27 derived from this software without prior written permission. For
28 written permission, please contact Leo.G@seznam.cz
29 4. Products derived from this software may not be called "Egothor",
30 nor may "Egothor" appear in their name, without prior written
31 permission from Leo.G@seznam.cz.
32
33 In addition, we request that you include in the end-user documentation
34 provided with the redistribution and/or in the software itself an
35 acknowledgement equivalent to the following:
36 "This product includes software developed by the Egothor Project.
37 http://egothor.sf.net/"
38
39 THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
40 WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
41 MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
42 IN NO EVENT SHALL THE EGOTHOR PROJECT OR ITS CONTRIBUTORS BE LIABLE
43 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
44 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
45 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
46 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
47 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
48 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
49 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
50
51 This software consists of voluntary contributions made by many
52 individuals on behalf of the Egothor Project and was originally
53 created by Leo Galambos (Leo.G@seznam.cz).
54 */
55 package org.egothor.stemmer;
56
57 /**
58 * The Diff object generates a patch string.
59 * <p>
60 * A patch string is actually a command to a stemmer telling it how to reduce a
61 * word to its root. For example, to reduce the word teacher to its root teach
62 * the patch string Db would be generated. This command tells the stemmer to
63 * delete the last 2 characters from the word teacher to reach the stem (the
64 * patch commands are applied starting from the last character in order to save
65 */
66 public class Diff {
67 int sizex = 0;
68 int sizey = 0;
69 int net[][];
70 int way[][];
71
72 int INSERT;
73 int DELETE;
74 int REPLACE;
75 int NOOP;
76
77 /**
78 * Constructor for the Diff object.
79 */
80 public Diff() {
81 this(1, 1, 1, 0);
82 }
83
84 /**
85 * Constructor for the Diff object
86 *
87 * @param ins Description of the Parameter
88 * @param del Description of the Parameter
89 * @param rep Description of the Parameter
90 * @param noop Description of the Parameter
91 */
92 public Diff(int ins, int del, int rep, int noop) {
93 INSERT = ins;
94 DELETE = del;
95 REPLACE = rep;
96 NOOP = noop;
97 }
98
99 /**
100 * Apply the given patch string <tt>diff</tt> to the given string <tt>
101 * dest</tt>.
102 *
103 * @param dest Destination string
104 * @param diff Patch string
105 */
106 public static void apply(StringBuilder dest, CharSequence diff) {
107 try {
108
109 if (diff == null) {
110 return;
111 }
112
113 int pos = dest.length() - 1;
114 if (pos < 0) {
115 return;
116 }
117 // orig == ""
118 for (int i = 0; i < diff.length() / 2; i++) {
119 char cmd = diff.charAt(2 * i);
120 char param = diff.charAt(2 * i + 1);
121 int par_num = (param - 'a' + 1);
122 switch (cmd) {
123 case '-':
124 pos = pos - par_num + 1;
125 break;
126 case 'R':
127 dest.setCharAt(pos, param);
128 break;
129 case 'D':
130 int o = pos;
131 pos -= par_num - 1;
132 /*
133 * delete par_num chars from index pos
134 */
135 // String s = orig.toString();
136 // s = s.substring( 0, pos ) + s.substring( o + 1 );
137 // orig = new StringBuffer( s );
138 dest.delete(pos, o + 1);
139 break;
140 case 'I':
141 dest.insert(pos += 1, param);
142 break;
143 }
144 pos--;
145 }
146 } catch (StringIndexOutOfBoundsException x) {
147 // x.printStackTrace();
148 } catch (ArrayIndexOutOfBoundsException x) {
149 // x.printStackTrace();
150 }
151 }
152
153 /**
154 * Construct a patch string that transforms a to b.
155 *
156 * @param a String 1st string
157 * @param b String 2nd string
158 * @return String
159 */
160 public synchronized String exec(String a, String b) {
161 if (a == null || b == null) {
162 return null;
163 }
164
165 int x;
166 int y;
167 int maxx;
168 int maxy;
169 int go[] = new int[4];
170 final int X = 1;
171 final int Y = 2;
172 final int R = 3;
173 final int D = 0;
174
175 /*
176 * setup memory if needed => processing speed up
177 */
178 maxx = a.length() + 1;
179 maxy = b.length() + 1;
180 if ((maxx >= sizex) || (maxy >= sizey)) {
181 sizex = maxx + 8;
182 sizey = maxy + 8;
183 net = new int[sizex][sizey];
184 way = new int[sizex][sizey];
185 }
186
187 /*
188 * clear the network
189 */
190 for (x = 0; x < maxx; x++) {
191 for (y = 0; y < maxy; y++) {
192 net[x][y] = 0;
193 }
194 }
195
196 /*
197 * set known persistent values
198 */
199 for (x = 1; x < maxx; x++) {
200 net[x][0] = x;
201 way[x][0] = X;
202 }
203 for (y = 1; y < maxy; y++) {
204 net[0][y] = y;
205 way[0][y] = Y;
206 }
207
208 for (x = 1; x < maxx; x++) {
209 for (y = 1; y < maxy; y++) {
210 go[X] = net[x - 1][y] + DELETE;
211 // way on x costs 1 unit
212 go[Y] = net[x][y - 1] + INSERT;
213 // way on y costs 1 unit
214 go[R] = net[x - 1][y - 1] + REPLACE;
215 go[D] = net[x - 1][y - 1]
216 + ((a.charAt(x - 1) == b.charAt(y - 1)) ? NOOP : 100);
217 // diagonal costs 0, when no change
218 short min = D;
219 if (go[min] >= go[X]) {
220 min = X;
221 }
222 if (go[min] > go[Y]) {
223 min = Y;
224 }
225 if (go[min] > go[R]) {
226 min = R;
227 }
228 way[x][y] = min;
229 net[x][y] = (short) go[min];
230 }
231 }
232
233 // read the patch string
234 StringBuilder result = new StringBuilder();
235 final char base = 'a' - 1;
236 char deletes = base;
237 char equals = base;
238 for (x = maxx - 1, y = maxy - 1; x + y != 0;) {
239 switch (way[x][y]) {
240 case X:
241 if (equals != base) {
242 result.append("-" + (equals));
243 equals = base;
244 }
245 deletes++;
246 x--;
247 break;
248 // delete
249 case Y:
250 if (deletes != base) {
251 result.append("D" + (deletes));
252 deletes = base;
253 }
254 if (equals != base) {
255 result.append("-" + (equals));
256 equals = base;
257 }
258 result.append('I');
259 result.append(b.charAt(--y));
260 break;
261 // insert
262 case R:
263 if (deletes != base) {
264 result.append("D" + (deletes));
265 deletes = base;
266 }
267 if (equals != base) {
268 result.append("-" + (equals));
269 equals = base;
270 }
271 result.append('R');
272 result.append(b.charAt(--y));
273 x--;
274 break;
275 // replace
276 case D:
277 if (deletes != base) {
278 result.append("D" + (deletes));
279 deletes = base;
280 }
281 equals++;
282 x--;
283 y--;
284 break;
285 // no change
286 }
287 }
288 if (deletes != base) {
289 result.append("D" + (deletes));
290 deletes = base;
291 }
292
293 return result.toString();
294 }
295 }